*-------------------------------------------------------------------------------
*						Data Pre-Process
*-------------------------------------------------------------------------------

** Set Path
global Raw_data    	"G:\project-finished\Descriptive\Data"
global App_data    	"G:\project-finished\Descriptive\Appendix Data"
global Class_data   "G:\project-finished\Descriptive\Classification"  
global Work_lab   	"G:\project-finished\Descriptive\Lab"
global Out_lab    	"G:\project-finished\Descriptive\Out"  

cd "$Work_lab"
                            
capture log close            
log using "$Out_lab\Pre-onet", replace 
set more off

**------------------------------------------------------------------------------
* Step1: Generate Data
*     (1)CHN-consistent & Characteristics different version
**------------------------------------------------------------------------------
*    Take the measure of physcial as the example
*   Measure 6: Physical Skills
*		(a)	Work Requirements: Generalized Work Activities: Performing General Physical Activities
*			general physical activities
*		(b) Work Requirements: Generalized Work Activities:  Handling and Moving Objects
*			handling and moving objects

*version3.0
import excel "$App_data\ONET N_version\onet30ac\workactivity.xlsx", sheet("workactivity") firstrow case(lower) clear
   rename element_name elementname
   tab elementname
   keep if elementname=="Performing General Physical Activities"|elementname=="Handling and Moving Objects"
   rename scale_id scaleid
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   replace elementname="Physical" if elementname=="Performing General Physical Activities"
   replace elementname="Objects" if elementname=="Handling and Moving Objects"
   rename (o_net_soc_code data_value) (onetsoccode datavalue)  
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename (datavalueObjects datavaluePhysical) (physical2 physical1)
   save "version3_skills_physical.dta", replace     

   
*version5.0
 import excel "$App_data\ONET N_version\db_50\db_50\WorkActivity.xlsx", sheet("WorkActivity") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Performing General Physical Activities"|elementname=="Handling and Moving Objects"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   replace elementname="Physical" if elementname=="Performing General Physical Activities"
   replace elementname="Objects" if elementname=="Handling and Moving Objects"
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename (datavalueObjects datavaluePhysical) (physical2 physical1)
   save "version5_skills_physical.dta", replace        
   
 
*version15.1
import excel "$App_data\ONET N_version\onet151ac\Work Activities.xlsx", sheet("Work_Activities") firstrow case(lower) clear

   tab  elementname
   keep if elementname=="Performing General Physical Activities"|elementname=="Handling and Moving Objects"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   replace elementname="Physical" if elementname=="Performing General Physical Activities"
   replace elementname="Objects" if elementname=="Handling and Moving Objects"
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename (datavalueObjects datavaluePhysical) (physical2 physical1)
   save "version151_skills_physical.dta", replace     

   
*version20
import excel "$App_data\ONET N_version\onet20ac\Work Activities.xlsx", sheet("Work_Activities") firstrow case(lower) clear

   tab  elementname
   keep if elementname=="Performing General Physical Activities"|elementname=="Handling and Moving Objects"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   replace elementname="Physical" if elementname=="Performing General Physical Activities"
   replace elementname="Objects" if elementname=="Handling and Moving Objects"
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename (datavalueObjects datavaluePhysical) (physical2 physical1)
   save "version20_skills_physical.dta", replace

*version25
import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear

   tab  elementname
   keep if elementname=="Performing General Physical Activities"|elementname=="Handling and Moving Objects"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   replace elementname="Physical" if elementname=="Performing General Physical Activities"
   replace elementname="Objects" if elementname=="Handling and Moving Objects"
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename (datavalueObjects datavaluePhysical) (physical2 physical1)
   save "version25_skills_physical.dta", replace   
   
   
   
*   Measure 5:  Cognitive Skills
*		(a)	Abilities: written comprehension
*			written comprehension
*		(b) Abilities: Mathematical Reasoning
*			mathematical reasoning ability
*		(c) Abilities: deductive reasoning
*			deductive reasoning
*		(d) Abilities: inductive reasoning
*			inductive reasoning
*-------------------------------------------------------------------------------
*version3.0
import excel "$App_data\ONET N_version\onet30ac\ability.xlsx", sheet("ability") firstrow case(lower) clear

   rename element_name elementname
   tab  elementname
   keep if elementname=="Written Comprehension"|elementname=="Mathematical Reasoning"|elementname=="Deductive Reasoning"|elementname=="Inductive Reasoning"
   rename scale_id scaleid
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   replace elementname=subinstr(elementname," ","",.)  //删除字符中的空格
   rename (o_net_soc_code data_value) (onetsoccode datavalue)  
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename (datavalueDeductiveReasoning datavalueInductiveReasoning datavalueMathematicalReasoning datavalueWrittenComprehension) (Cognitive1 Cognitive2 Cognitive3 Cognitive4)
   save "version3_skills_cognitive.dta", replace        

*version5.0  
import excel "$App_data\ONET N_version\db_50\db_50\Ability.xlsx", sheet("Ability") firstrow case(lower) clear

   tab  elementname
   keep if elementname=="Written Comprehension"|elementname=="Mathematical Reasoning"|elementname=="Deductive Reasoning"|elementname=="Inductive Reasoning"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   replace elementname=subinstr(elementname," ","",.)  //删除字符中的空格
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename (datavalueDeductiveReasoning datavalueInductiveReasoning datavalueMathematicalReasoning datavalueWrittenComprehension) (Cognitive1 Cognitive2 Cognitive3 Cognitive4)
   save "version5_skills_cognitive.dta", replace        

   
*version15.1  
import excel "$App_data\ONET N_version\onet151ac\Abilities.xlsx", sheet("Abilities") firstrow case(lower) clear

   tab  elementname
   keep if elementname=="Written Comprehension"|elementname=="Mathematical Reasoning"|elementname=="Deductive Reasoning"|elementname=="Inductive Reasoning"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   replace elementname=subinstr(elementname," ","",.)  //删除字符中的空格
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename (datavalueDeductiveReasoning datavalueInductiveReasoning datavalueMathematicalReasoning datavalueWrittenComprehension) (Cognitive1 Cognitive2 Cognitive3 Cognitive4)
   save "version151_skills_cognitive.dta", replace         
   
*version20.0 
import excel "$App_data\ONET N_version\onet20ac\Abilities.xlsx", sheet("Abilities") firstrow case(lower) clear

   tab  elementname
   keep if elementname=="Written Comprehension"|elementname=="Mathematical Reasoning"|elementname=="Deductive Reasoning"|elementname=="Inductive Reasoning"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   replace elementname=subinstr(elementname," ","",.)  //删除字符中的空格
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename (datavalueDeductiveReasoning datavalueInductiveReasoning datavalueMathematicalReasoning datavalueWrittenComprehension) (Cognitive1 Cognitive2 Cognitive3 Cognitive4)
   save "version20_skills_cognitive.dta", replace      

*version25.0 
import excel "$App_data\ONET N_version\db_25_0_excel\Abilities.xlsx", sheet("Abilities") firstrow case(lower) clear

   tab  elementname
   keep if elementname=="Written Comprehension"|elementname=="Mathematical Reasoning"|elementname=="Deductive Reasoning"|elementname=="Inductive Reasoning"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   replace elementname=subinstr(elementname," ","",.)  //删除字符中的空格
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename (datavalueDeductiveReasoning datavalueInductiveReasoning datavalueMathematicalReasoning datavalueWrittenComprehension) (Cognitive1 Cognitive2 Cognitive3 Cognitive4)
   save "version25_skills_cognitive.dta", replace         
   
*   Measure 1:  Competition
*			Work Contexts: Competition: Level of Competition
*			Based on the question “How competitive is your current job?”
*-------------------------------------------------------------------------------   
*version3.0 do not record the "level of competition"


*version5.0
 import excel "$App_data\ONET N_version\db_50\db_50\WorkContext.xlsx", sheet("WorkContext") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Level of Competition"
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   
   replace elementname=subinstr(elementname," ","",.)	
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename datavalueLevelofCompetition Competition
   save "version5_skills_competition.dta", replace 

*version15.1
import excel "$App_data\ONET N_version\onet151ac\Work Context.xlsx", sheet("Work_Context") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Level of Competition"
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   
   replace elementname=subinstr(elementname," ","",.)	
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename datavalueLevelofCompetition Competition
   save "version151_skills_competition.dta", replace 
 
*version20
import excel "$App_data\ONET N_version\onet20ac\Work Context.xlsx", sheet("Work_Context") firstrow case(lower) clear

   rename element_name elementname
   tab elementname
   keep if elementname=="Level of Competition"
   rename scale_id  scaleid
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   
   replace elementname=subinstr(elementname," ","",.)
   rename o_net_soc_code onetsoccode
   rename data_value datavalue
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename datavalueLevelofCompetition Competition
   save "version20_skills_competition.dta", replace

   
import excel "$App_data\ONET N_version\db_25_0_excel\Work Context.xlsx", sheet("Work Context") firstrow case(lower) clear

   tab elementname
   keep if elementname=="Level of Competition"
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   
   replace elementname=subinstr(elementname," ","",.)
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string

   rename datavalueLevelofCompetition Competition
   save "version25_skills_competition.dta", replace
   
   
*   Measure 4:  Interactional Skills
*		(a)	Work Context: Contact With Others
*			“How much contact with others (by telephone, face-to-face, or otherwise) 
*			is required to performance your current job?”
*		(b) Work Context: Work With Work Group or Team
*			“How important are interactions that require you to work with or 
*			contribute to a work group or team to perform your current job?” 
*		(c) Work Requirements: Generalized Work Activities: Establishing and Maintaining Interpersonal Relationships
*			“How important is establishing and maintaining interpersonal 
*			relationships to the performance of your current job?”
*		(d) Skills: Social Perceptiveness
*			“How important is social perceptiveness to the performance of your current job?”
*-------------------------------------------------------------------------------   	
*version3.0  do not record the "Contact With Others" “Work With Work Group or Team”
   

*version5.0
 import excel "$App_data\ONET N_version\db_50\db_50\WorkContext.xlsx", sheet("WorkContext") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Contact With Others"|elementname=="Work With Work Group or Team"
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   
   replace elementname=subinstr(elementname," ","",.)	
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string
	
   rename (datavalueContactWithOthers datavalueWorkWithWorkGrouporTeam) (interact1 interact2)
   label var interact1 "Interactional Skills:Contact With Others"
   label var interact2 "Interactional Skills:Work With Work Group or Team"
   save "version5_skills_interact1_2.dta", replace    
   

 import excel "$App_data\ONET N_version\db_50\db_50\WorkActivity.xlsx", sheet("WorkActivity") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Establishing and Maintaining Interpersonal Relationships" 
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode)
   
   rename datavalue interact3
   label var interact3 "Interactional Skills: Establishing and Maintaining Interpersonal Relationships"
   save "version5_skills_interact3.dta", replace    
     

 import excel "$App_data\ONET N_version\db_50\db_50\Skills.xlsx", sheet("Skills") firstrow case(lower) clear   
   tab  elementname
   keep if elementname=="Social Perceptiveness" 
   tab scaleid
   keep if  scaleid=="IM"  //Social Perceptiveness have IM ratings and LV ratings, we take IM as CP2017
   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode)

   rename datavalue interact4
   label var interact4 "Interactional Skills:Social Perceptiveness"
   save "version5_skills_interact4.dta", replace    

   
*version15.1
import excel "$App_data\ONET N_version\onet151ac\Work Context.xlsx", sheet("Work_Context") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Contact With Others"|elementname=="Work With Work Group or Team"
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   
   replace elementname=subinstr(elementname," ","",.)	
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string
	
   rename (datavalueContactWithOthers datavalueWorkWithWorkGrouporTeam) (interact1 interact2)
   label var interact1 "Interactional Skills:Contact With Others"
   label var interact2 "Interactional Skills:Work With Work Group or Team"
   save "version151_skills_interact1_2.dta", replace    
   

import excel "$App_data\ONET N_version\onet151ac\Work Activities.xlsx", sheet("Work_Activities") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Establishing and Maintaining Interpersonal Relationships" 
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode)
   
   rename datavalue interact3
   label var interact3 "Interactional Skills: Establishing and Maintaining Interpersonal Relationships"
   save "version151_skills_interact3.dta", replace    
     

 import excel "$App_data\ONET N_version\onet151ac\Skills.xlsx", sheet("Skills") firstrow case(lower) clear   
   tab  elementname
   keep if elementname=="Social Perceptiveness" 
   tab scaleid
   keep if  scaleid=="IM"  //Social Perceptiveness have IM ratings and LV ratings, we take IM as CP2017
   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode)

   rename datavalue interact4
   label var interact4 "Interactional Skills:Social Perceptiveness"
   save "version151_skills_interact4.dta", replace     
   
*version20.0
import excel "$App_data\ONET N_version\onet20ac\Work Context.xlsx", sheet("Work_Context") firstrow case(lower) clear
   rename element_name elementname
   tab elementname
   keep if elementname=="Contact With Others"|elementname=="Work With Work Group or Team"
   rename scale_id scaleid
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   
   replace elementname=subinstr(elementname," ","",.)	
   rename o_net_soc_code onetsoccode
   rename data_value datavalue
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string
	
   rename (datavalueContactWithOthers datavalueWorkWithWorkGrouporTeam) (interact1 interact2)
   label var interact1 "Interactional Skills:Contact With Others"
   label var interact2 "Interactional Skills:Work With Work Group or Team"
   save "version20_skills_interact1_2.dta", replace    
   

import excel "$App_data\ONET N_version\onet20ac\Work Activities.xlsx", sheet("Work_Activities") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Establishing and Maintaining Interpersonal Relationships" 
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode)
   
   rename datavalue interact3
   label var interact3 "Interactional Skills: Establishing and Maintaining Interpersonal Relationships"
   save "version20_skills_interact3.dta", replace    
     

 import excel "$App_data\ONET N_version\onet20ac\Skills.xlsx", sheet("Skills") firstrow case(lower) clear   
   tab  elementname
   keep if elementname=="Social Perceptiveness" 
   tab scaleid
   keep if  scaleid=="IM"  //Social Perceptiveness have IM ratings and LV ratings, we take IM as CP2017
   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode)

   rename datavalue interact4
   label var interact4 "Interactional Skills:Social Perceptiveness"
   save "version20_skills_interact4.dta", replace
 
 

*version25.0
import excel "$App_data\ONET N_version\db_25_0_excel\Work Context.xlsx", sheet("Work Context") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Contact With Others"|elementname=="Work With Work Group or Team"
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   
   replace elementname=subinstr(elementname," ","",.)	
   keep onetsoccode elementname datavalue
   reshape wide datavalue, i(onetsoccode) j(elementname) string
	
   rename (datavalueContactWithOthers datavalueWorkWithWorkGrouporTeam) (interact1 interact2)
   label var interact1 "Interactional Skills:Contact With Others"
   label var interact2 "Interactional Skills:Work With Work Group or Team"
   save "version25_skills_interact1_2.dta", replace    
   

import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Establishing and Maintaining Interpersonal Relationships" 
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance

   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode)
   
   rename datavalue interact3
   label var interact3 "Interactional Skills: Establishing and Maintaining Interpersonal Relationships"
   save "version25_skills_interact3.dta", replace    
     

 import excel "$App_data\ONET N_version\db_25_0_excel\Skills.xlsx", sheet("Skills") firstrow case(lower) clear   
   tab  elementname
   keep if elementname=="Social Perceptiveness" 
   tab scaleid
   keep if  scaleid=="IM"  //Social Perceptiveness have IM ratings and LV ratings, we take IM as CP2017
   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode)

   rename datavalue interact4
   label var interact4 "Interactional Skills:Social Perceptiveness"
   save "version25_skills_interact4.dta", replace 
 
  
*   Measure 8: Routineness 
*       source: Acemoglu & Autor 2011
*       Skills, Tasks and Technologies: Implications for Employment and Earnings
*		Routine cognitive
*			4.C.3.b.7 Importance of repeating the same tasks (work context)
*			4.C.3.b.4 Importance of being exact or accurate (work context)
*			4.C.3.b.8 Structured v. Unstructured work (reverse) (work context)
*		Routine manual
*			4.C.3.d.3 Pace determined by speed of equipment (work context)
*			4.A.3.a.3 Controlling machines and processes (work activity)
*			4.C.2.d.1.i Spend time making repetitive motions (work context)
*-------------------------------------------------------------------------------
*version3.0  record the "Importance of repeating the same tasks" but the scale is differnt
   

*version5.0
 import excel "$App_data\ONET N_version\db_50\db_50\WorkContext.xlsx", sheet("WorkContext") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Importance of Repeating Same Tasks"| elementname=="Importance of Being Exact or Accurate"| elementname=="Spend Time Making Repetitive Motions"|elementname== "Pace Determined by Speed of Equipment"|elementname=="Structured versus Unstructured Work"
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   replace datavalue=6-datavalue if elementname=="Structured versus Unstructured Work"
   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode elementname)
   replace elementname=subinstr(elementname," ","",.)  //删除字符中的空格
   gen temp=substr(elementname,-10,.)
   drop elementname
   reshape wide datavalue, i(onetsoccode) j(temp) string
   rename (datavaluecturedWork datavaluefEquipment datavaluegSameTasks datavalueiveMotions datavalueorAccurate) (routinecogni3 routinemanual1 routinecogni1 routinemanual3 routinecogni2)    
   save "version5_skills_routine1.dta", replace    
   

 import excel "$App_data\ONET N_version\db_50\db_50\WorkActivity.xlsx", sheet("WorkActivity") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Controlling Machines and Processes"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance
   
   *Take average of these interaction ratings
   keep onetsoccode elementname datavalue
   collapse (mean) datavalue,by(onetsoccode)
   rename datavalue routinemanual2
   save "version5_skills_routine2.dta", replace    

   
*version15.1
import excel "$App_data\ONET N_version\onet151ac\Work Context.xlsx", sheet("Work_Context") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Importance of Repeating Same Tasks"| elementname=="Importance of Being Exact or Accurate"| elementname=="Spend Time Making Repetitive Motions"|elementname== "Pace Determined by Speed of Equipment"|elementname=="Structured versus Unstructured Work"
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   replace datavalue=6-datavalue if elementname=="Structured versus Unstructured Work"
   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode elementname)
   replace elementname=subinstr(elementname," ","",.)  //删除字符中的空格
   gen temp=substr(elementname,-10,.)
   drop elementname
   reshape wide datavalue, i(onetsoccode) j(temp) string
   rename (datavaluecturedWork datavaluefEquipment datavaluegSameTasks datavalueiveMotions datavalueorAccurate) (routinecogni3 routinemanual1 routinecogni1 routinemanual3 routinecogni2)    
   save "version151_skills_routine1.dta", replace    
   

import excel "$App_data\ONET N_version\onet151ac\Work Activities.xlsx", sheet("Work_Activities") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Controlling Machines and Processes"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance
   
   *Take average of these interaction ratings
   keep onetsoccode elementname datavalue
   collapse (mean) datavalue,by(onetsoccode)
   rename datavalue routinemanual2
   save "version151_skills_routine2.dta", replace    

*version20.0
import excel "$App_data\ONET N_version\onet20ac\Work Context.xlsx", sheet("Work_Context") firstrow case(lower) clear
   rename (element_name scale_id) (elementname scaleid)
   rename (o_net_soc_code data_value) (onetsoccode datavalue)
   tab elementname
   keep if elementname=="Importance of Repeating Same Tasks"| elementname=="Importance of Being Exact or Accurate"| elementname=="Spend Time Making Repetitive Motions"|elementname== "Pace Determined by Speed of Equipment"|elementname=="Structured versus Unstructured Work"
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   replace datavalue=6-datavalue if elementname=="Structured versus Unstructured Work"
   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode elementname)
   replace elementname=subinstr(elementname," ","",.)  //删除字符中的空格
   gen temp=substr(elementname,-10,.)
   drop elementname
   reshape wide datavalue, i(onetsoccode) j(temp) string
   rename (datavaluecturedWork datavaluefEquipment datavaluegSameTasks datavalueiveMotions datavalueorAccurate) (routinecogni3 routinemanual1 routinecogni1 routinemanual3 routinecogni2)    
   save "version20_skills_routine1.dta", replace    
   

import excel "$App_data\ONET N_version\onet20ac\Work Activities.xlsx", sheet("Work_Activities") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Controlling Machines and Processes"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance
   
   *Take average of these interaction ratings
   keep onetsoccode elementname datavalue
   collapse (mean) datavalue,by(onetsoccode)
   rename datavalue routinemanual2
   save "version20_skills_routine2.dta", replace       
   

*version25.0
import excel "$App_data\ONET N_version\db_25_0_excel\Work Context.xlsx", sheet("Work Context") firstrow case(lower) clear
   tab elementname
   keep if elementname=="Importance of Repeating Same Tasks"| elementname=="Importance of Being Exact or Accurate"| elementname=="Spend Time Making Repetitive Motions"|elementname== "Pace Determined by Speed of Equipment"|elementname=="Structured versus Unstructured Work"
   tab scaleid
   keep if scaleid=="CX"  // To keep consist with others, we take importance
   replace datavalue=6-datavalue if elementname=="Structured versus Unstructured Work"
   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode elementname)
   replace elementname=subinstr(elementname," ","",.)  //删除字符中的空格
   gen temp=substr(elementname,-10,.)
   drop elementname
   reshape wide datavalue, i(onetsoccode) j(temp) string
   rename (datavaluecturedWork datavaluefEquipment datavaluegSameTasks datavalueiveMotions datavalueorAccurate) (routinecogni3 routinemanual1 routinecogni1 routinemanual3 routinecogni2)    
   save "version25_skills_routine1.dta", replace    
   

import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Controlling Machines and Processes"
   tab scaleid
   keep if scaleid=="IM"  // To keep consist with others, we take importance
   
   *Take average of these interaction ratings
   keep onetsoccode elementname datavalue
   collapse (mean) datavalue,by(onetsoccode)
   rename datavalue routinemanual2
   save "version25_skills_routine2.dta", replace        
   

*   Measure additional: Required Education 
*-------------------------------------------------------------------------------
*version25.0
import excel "$App_data\ONET N_version\db_25_0_excel\Education, Training, and Experience.xlsx", sheet("Education, Training, and Experi") firstrow case(lower) clear
   keep if elementname=="Required Level of Education"
   gen occ_code=substr(onetsoccode,1,7)  
   keep onetsoccode title category datavalue occ_code
*Replace categorical data into "years of education"
   gen temp=category
   replace category=10 if temp==1
   replace category=12 if temp==2
   replace category=13 if temp==3
   replace category=14 if temp==4
   replace category=15 if temp==5
   replace category=16 if temp==6
   replace category=17 if temp==7
   replace category=18 if temp==8
   replace category=18 if temp==9
   replace category=18 if temp==10
   replace category=19 if temp==11
   replace category=20 if temp==12
   *The number for each category is the number of years for education.
   rename category educationyear
   drop temp
*"Datavalue" is the percent points of people in each category for that occupation
*An indicator of occupation required education is constructed as the weighted education years
   bysort onetsoccode: egen education=total(educationyear*datavalue/100)
   collapse (mean) education, by(onetsoccode)
   label variable education "Years of education LTHS 10, HS 12, BA 16, MS 18, PHD 19"
   sort onetsoccode
   save "version25_skills_education.dta", replace       
   
/*==================================================
     3:CHN-consistent & characteristics (measure_index.do)
==================================================*/ 
*First of all, we need to combine all the measurement in differnt version

*version 5
use version5_skills_physical.dta,clear
foreach var in "cognitive" "competition" "interact1_2" "interact3" "interact4" "routine1" "routine2"{
merge 1:1 onetsoccode using "version5_skills_`var'.dta",nogen
}
save version5_skills,replace

*version 15.1
use version151_skills_physical.dta,clear
foreach var in "cognitive" "competition" "interact1_2" "interact3" "interact4" "routine1" "routine2"{
merge 1:1 onetsoccode using "version151_skills_`var'.dta",nogen
}
save version151_skills,replace


*version 20.0
use version20_skills_physical.dta,clear
foreach var in "cognitive" "competition" "interact1_2" "interact3" "interact4" "routine1" "routine2"{
merge 1:1 onetsoccode using "version20_skills_`var'.dta",nogen
}
save version20_skills,replace

*version 25.0
use version25_skills_physical.dta,clear
foreach var in "cognitive" "competition" "interact1_2" "interact3" "interact4" "routine1" "routine2" "education"{
merge 1:1 onetsoccode using "version25_skills_`var'.dta",nogen
}
save version25_skills,replace


*Then, we combine different versions of characteristics to chinese occ categoty,
*This need the bridge "ONETSOC2010" "ONETSOC2000" TWO VERSIONS

*Version25
use "F:\2023\project\description of occupation\lab\CHN-consistent_ONETSOC2010.dta",clear

rename onet_code onetsoccode
merge m:1 onetsoccode using version25_skills.dta
drop if _m==2
collapse (mean) physical2-routinemanual2 education,by(consistent title_consistent occ_1dig occ_2dig)


foreach var of varlist physical2-routinemanual2{
	bysort occ_2dig:egen `var'_ad=mean(`var')
	bysort occ_1dig:egen `var'_ad2=mean(`var')
	replace `var'=`var'_ad if `var'==.
	replace `var'=`var'_ad2 if `var'==.
	summarize `var'
	replace `var'=`r(mean)' if `var'==.
	drop `var'_ad `var'_ad2
	egen `var'_std=std(`var')
}

save CHN_cons_to_SOC2010_v25.dta,replace


*version20
use "F:\2023\project\description of occupation\lab\CHN-consistent_ONETSOC2010.dta",clear

rename onet_code onetsoccode
merge m:1 onetsoccode using version20_skills.dta
drop if _m==2
collapse (mean) physical2-routinemanual2,by(consistent title_consistent occ_1dig occ_2dig)


foreach var of varlist physical2-routinemanual2 {
	bysort occ_2dig:egen `var'_ad=mean(`var')
	bysort occ_1dig:egen `var'_ad2=mean(`var')
	replace `var'=`var'_ad if `var'==.
	replace `var'=`var'_ad2 if `var'==.
	summarize `var'
	replace `var'=`r(mean)' if `var'==.
	drop `var'_ad `var'_ad2
	egen `var'_std=std(`var')
}

save CHN_cons_to_SOC2010_v20.dta,replace

*version15.1
use "F:\2023\project\description of occupation\lab\CHN-consistent_ONETSOC2010.dta",clear

rename onet_code onetsoccode
merge m:1 onetsoccode using version151_skills.dta
drop if _m==2
collapse (mean) physical2-routinemanual2,by(consistent title_consistent occ_1dig occ_2dig)


foreach var of varlist physical2-routinemanual2 {
	bysort occ_2dig:egen `var'_ad=mean(`var')
	bysort occ_1dig:egen `var'_ad2=mean(`var')
	replace `var'=`var'_ad if `var'==.
	replace `var'=`var'_ad2 if `var'==.
	summarize `var'
	replace `var'=`r(mean)' if `var'==.
	drop `var'_ad `var'_ad2
	egen `var'_std=std(`var')
}

save CHN_cons_to_SOC2010_v151.dta,replace   


*version5.0
use "F:\2023\project\description of occupation\lab\CHN-consistent_ONETSOC2000.dta",clear

rename onetsoc2000code onetsoccode
merge m:1 onetsoccode using version5_skills.dta
drop if _m==2
collapse (mean) physical2-routinemanual2,by(consistent title_consistent occ_1dig occ_2dig)


foreach var of varlist physical2-routinemanual2 {
	bysort occ_2dig:egen `var'_ad=mean(`var')
	bysort occ_1dig:egen `var'_ad2=mean(`var')
	replace `var'=`var'_ad if `var'==.
	replace `var'=`var'_ad2 if `var'==.
	summarize `var'
	replace `var'=`r(mean)' if `var'==.
	drop `var'_ad `var'_ad2
	egen `var'_std=std(`var')
}

save CHN_cons_to_SOC2000_v5.dta,replace   
   
   
   
/*==================================================
     4:CHN-consistent & Chn_census structure
==================================================*/ 

*Generate census occupation structure each year

*Different 
foreach j in "2000" "2010" "2015"{
use "$Raw_data\census`j'.dta",clear
*Define diffent variables and adjust industry category
	if `j'==2000{
	
		*conditional on employed workers 
		drop if occ==.|occ==0 //0-at school
		tostring occ, generate(occ_`j')
		replace occ_`j'="0"+occ_`j' if occ < 100
		rename occ_`j' occ2010
		merge m:1 occ2010 using occ2010_consistent.dta
	}

	if `j'==2010{
		gen age =2010-_出生年
	
		*Conditional on employed workers 
		drop if _职业==.
		tostring _职业, generate(occ_`j')
		replace occ_`j'="0"+occ_`j' if _职业 < 100
		rename occ_`j' occ2010
		merge m:1 occ2010 using occ2010_consistent.dta
	}

	if `j'==2015{
		gen age=2015-birth_year
		
		*Conditional on employed workers 
		drop if occ==""
		gen occ_`j'=occ
		merge m:1 occ_`j' using occ2015_consistent.dta
	}
	
keep if _m==3
drop _m
	
*Labor in the market
keep if age >=15&age<=64	

*生成新的id
gen id_new=_n

*职业数量
collapse (count) id_new ,by(consistent title_consistent)
egen total=total(id_new)
gen share =(id_new/total)*100

gen year=`j'

compress
rename (consistent title_consistent) (occ_number occ_name)

save "structure_occ`j'.dta",replace
}



*census2015--version5.0/ version15.1 /version 20.0/version 25.0
use structure_occ2015,clear
keep occ_number occ_name share
rename (occ_number occ_name) (consistent title_consistent)

foreach i in "v151" "v20" "v25"{
preserve
merge 1:1 consistent using CHN_cons_to_SOC2010_`i'
drop if _m==2
drop _m
save CHN2015_`i',replace
restore
}

foreach i in "v5"{
preserve
merge 1:1 consistent using CHN_cons_to_SOC2000_`i'
drop if _m==2
drop _m
save CHN2015_`i',replace
restore
}



*census2010--version5.0/ version15.1 /version 20.0/version 25.0
use structure_occ2010,clear
keep occ_number occ_name share
rename (occ_number occ_name) (consistent title_consistent)
foreach i in "v151" "v20" "v25"{
preserve
merge 1:1 consistent using CHN_cons_to_SOC2010_`i'
drop if _m==2
drop _m
save CHN2010_`i',replace
restore
}

foreach i in "v5"{
preserve
merge 1:1 consistent using CHN_cons_to_SOC2000_`i'
drop if _m==2
drop _m
save CHN2010_`i',replace
restore
}



*census2000--version5.0/ version15.1 /version 20.0/version 25.0
use structure_occ2000,clear
keep occ_number occ_name share
rename (occ_number occ_name) (consistent title_consistent)
foreach i in "v151" "v20" "v25"{
preserve
merge 1:1 consistent using CHN_cons_to_SOC2010_`i'
drop if _m==2
drop _m
save CHN2000_`i',replace
restore
}

foreach i in "v5"{
preserve
merge 1:1 consistent using CHN_cons_to_SOC2000_`i'
drop if _m==2
drop _m
save CHN2000_`i',replace
restore
}



**------------------------------------------------------------------------------
*Crosswalk between CHN-OCC2010 & CHN-OCC2015
*(3)Merge CHN-consisent with different onet characteristics
**------------------------------------------------------------------------------
use CHN_cons_to_SOC2010_v25,clear

rename *_std std_*
gen index1_competition=std_Competition
gen index2_interact=(std_interact1 + std_interact2 + std_interact3 + std_interact4)/4
gen index3_cognitive=(std_Cognitive1 + std_Cognitive2 + std_Cognitive3 + std_Cognitive4)/4
gen index4_physical=(std_physical2 + std_physical1)/2
gen index5_routinemanual=(std_routinemanual1+std_routinemanual2+std_routinemanual3)/3
gen index6_routinecognitive=(std_routinecogni1+std_routinecogni2+std_routinecogni3)/3

keep consistent title_consistent occ_1dig occ_2dig index1_competition index2_interact index3_cognitive index4_physical index5_routinemanual index6_routinecognitive education
save "consistent_v25characteristics_6index.dta",replace

*Get occ2015: consisent: index
use "occ2015_consistent.dta",clear
merge m:1 consistent title_consistent using "consistent_v25characteristics_6index.dta"
keep if _m==3
drop _m
save "2015_occ_consistent_characteristics.dta",replace

*Get occ2010: consisent: index
use "occ2010_consistent.dta",clear
merge m:1 consistent title_consistent using "consistent_v25characteristics_6index.dta"
keep if _m==3
drop _m
gen occ_2010=occ2010
drop occ2010
save "2010_occ_consistent_characteristics.dta",replace

*Get occ2010: consisent: index
use "2010_occ_consistent_characteristics.dta",clear
gen occ_2000=occ_2010 
drop occ_2010
save "2000_occ_consistent_characteristics.dta",replace


erase consistent_v25characteristics_6index.dta
foreach var in "physical" "cognitive" "competition" "interact1_2" "interact3" "interact4" "routine1" "routine2"{
erase version5_skills_`var'.dta
erase version151_skills_`var'.dta
erase version20_skills_`var'.dta
erase version25_skills_`var'.dta
}
erase version3_skills_cognitive.dta
erase version3_skills_physical.dta

erase version25_skills_education.dta

erase structure_occ2000.dta
erase structure_occ2010.dta
erase structure_occ2015.dta
**------------------------------------------------------------------------------
/*							OUT PUT File
			CHN2000*, CHN2010* CHN2015*
 			CHN_cons_to_SOC2010_v25
			CHN_cons_to_SOC2010_v20
 			CHN_cons_to_SOC2010_v151
 			CHN_cons_to_SOC2000_v5
			version5_skills
			version151_skills
			version20_skills
			version25_skills
*/
**------------------------------------------------------------------------------


log close






 
